from openai import OpenAI

# 配置本地 API 端点
client = OpenAI(
    api_key="none",
    base_url="http://localhost:8111/v1"  # 指向本地服务
)


# 流式调用
response = client.chat.completions.create(
    model="Qwen2.5-0.5B-Instruct",
    messages=[{"role": "user", "content": "介绍一下人工智能"}],
    stream=True  # 启用流式
)

for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)
